group_by(y) %>%
mutate(pct01 = length(t[t>2.576])/length(t))
results_behave <- data.frame(results_behave)
to.order <- aggregate(results_behave, by=list(results_behave$y),
FUN=mean, na.rm=TRUE)
to.order$Group.1 <- reorder(to.order$Group.1, -to.order$pct01)
results_behave$orderedys <- results_behave$y
results_behave$orderedys <- factor(results_behave$orderedys,
levels = names(table(to.order$Group.1)))  # Reordering group factor levels
################ NOW CREATE PLOTS ################
setwd(paste0(path.name, "/output"))
# all values results
p <- ggplot(data = results_home, mapping = aes(x=t))
p + geom_density(color="gray", fill="gray") +
geom_vline(aes(xintercept=median(t))) + geom_vline(aes(xintercept=2.576), color="black", linetype="dashed") +
theme_classic() +   xlab("|t-statistic| of linguistic feature variable") + ylab("Density") +
annotate("text", x=c(10,10), y=c(.13,.15), label=c("median |t-statistic|", "t = 2.576"), color=c("black","black")) +
annotate("segment", x=5, xend=3.5, y=.13, yend=.13, arrow=arrow(length=unit(.05, "inches"))) +
annotate("segment", x=5, xend=2.8, y=.15, yend=.15, color="black", arrow=arrow(length=unit(.05, "inches")))
p_out <- p + geom_density(color="gray", fill="gray") +
geom_vline(aes(xintercept=median(t))) + geom_vline(aes(xintercept=2.576), color="black", linetype="dashed") +
theme_classic() +   xlab("|t-statistic| of linguistic feature variable") + ylab("Density") +
annotate("text", x=c(10,10), y=c(.13,.15), label=c("median |t-statistic|", "t = 2.576"), color=c("black","black")) +
annotate("segment", x=6, xend=3.5, y=.13, yend=.13, arrow=arrow(length=unit(.05, "inches"))) +
annotate("segment", x=6, xend=2.8, y=.15, yend=.15, color="black", arrow=arrow(length=unit(.05, "inches")))
ggsave("figure1.pdf", plot=p_out, height=3, width=6, units="in")
# values results by DVs
p + geom_density(color="gray", fill="gray") + ylab("Density") +
theme_classic() + facet_wrap(~orderedys, ncol = 5) +
geom_vline(aes(xintercept=med_t)) + geom_vline(aes(xintercept=2.576), color="black", linetype="dashed")  +
xlab("|t-statistic| of linguistic feature variable") +
theme(strip.background = element_rect(colour="white", fill="white"))
p_out <- p + geom_density(color="gray", fill="gray") + ylab("Density") +
theme_classic() + facet_wrap(~orderedys, ncol = 5) +
geom_vline(aes(xintercept=med_t)) + geom_vline(aes(xintercept=2.576), color="black", linetype="dashed")  +
xlab("|t-statistic| of linguistic feature variable") +
theme(strip.background = element_rect(colour="white", fill="white"))
ggsave("figure2.pdf", plot=p_out, height=4, width=6, units="in")
# behavioral results by DVs
p <- ggplot(data = results_behave, mapping = aes(x=t))
p + geom_density(color="gray", fill="gray") + ylab("Density") +
theme_classic() + facet_wrap(~orderedys, ncol = 2) +
geom_vline(aes(xintercept=med_t)) + geom_vline(aes(xintercept=2.576), color="black", linetype="dashed")  +
xlab("|t-statistic| of linguistic feature variable") +
theme(strip.background = element_rect(colour="white", fill="white"))
p_out <- p + geom_density(color="gray", fill="gray") + ylab("Density") +
theme_classic() + facet_wrap(~orderedys, ncol = 2) +
geom_vline(aes(xintercept=med_t)) + geom_vline(aes(xintercept=2.576), color="black", linetype="dashed")  +
xlab("|t-statistic| of linguistic feature variable") +
theme(strip.background = element_rect(colour="white", fill="white"))
ggsave("figure3.pdf", plot=p_out, height=2.5, width=6, units="in")
# APPENDIX FIGURE A1
# comparison of language at home and language of interview
p <- ggplot(data = compare, mapping = aes(x=t.home,y=t.interview))
p + geom_point(shape=1, color="black", fill="white") + theme_classic() +
geom_hline(aes(yintercept=2.576), color="black", linetype="dashed") + geom_vline(aes(xintercept=2.576), color="black", linetype="dashed") +
ylab("|t-statistic| using Language of Interview") + xlab("|t-statistic| using Language at Home") +
scale_x_log10(breaks = scales::trans_breaks("log10", function(x) 10^x),
labels = scales::trans_format("log10", scales::math_format(10^.x))) +
scale_y_log10(breaks = scales::trans_breaks("log10", function(x) 10^x),
labels = scales::trans_format("log10", scales::math_format(10^.x)))
p_out <- p + geom_point(shape=1, color="black", fill="white") + theme_classic() +
geom_hline(aes(yintercept=2.576), color="black", linetype="dashed") + geom_vline(aes(xintercept=2.576), color="black", linetype="dashed") +
ylab("|t-statistic| using Language of Interview (Log scale)") + xlab("|t-statistic| using Language at Home (Log scale)") +
scale_x_log10(breaks = scales::trans_breaks("log10", function(x) 10^x),
labels = scales::trans_format("log10", scales::math_format(10^.x))) +
scale_y_log10(breaks = scales::trans_breaks("log10", function(x) 10^x),
labels = scales::trans_format("log10", scales::math_format(10^.x)))
ggsave("figureA1.pdf", plot=p_out, height=4, width=6, units="in")
################ AND HERE IS HOW WE FIX IT ################
setwd(paste0(path.name, "/output"))
results_to_fix <- rbind(read.csv("results_home.csv"),read.csv("results_behave.csv"))
most_sig <- head(arrange(results_to_fix, -t), 50)
to_fix <- most_sig[,1:2]
# twoway clustering
models_fixed <- to_fix %>%
# Build a formula for each combination
mutate(formula = map2(x, y, ~ as.formula(paste(.y, "~ as.factor(", .x,") + factor(Employment.status) +
Sex + as.numeric(Age) + I(as.numeric(Age)^2) + factor(Marital.status) + factor(Scale.of.incomes) +
factor(Highest.educational.level.attained) + factor(Social.class..subjective.) +
factor(Country.region) + factor(Year.survey)" )))) %>%
# Run a model for each formula
mutate(model = map(formula, ~ lm(.x, data = matched_wals_wvs_home))) %>%
# extract s.e.
mutate(vmat = map(model, vcovCL, cluster = ~ Country.region + Year.survey, multi0 = TRUE))
totidy <- lapply(seq(nrow(most_sig)), function(x) coeftest(models_fixed$model[[x]], models_fixed$vmat[[x]]))
tidied <- map(totidy, tidy)
tidied_res <- lapply(seq(nrow(most_sig)), function(x) c(tidied[[x]]$estimate[2],
tidied[[x]]$statistic[2]))
tidied_res <- data.frame(t(matrix(unlist(tidied_res), ncol = nrow(to_fix), byrow = FALSE)))
tidied_res <- cbind(to_fix, tidied_res)
names(tidied_res) <- c("y","x","b","t")
tidied_res$t <- abs(tidied_res$t)
tidied_res$Method <- "Clustered"
tidied_res$Model <- seq(1:nrow(tidied_res))
most_sig$Method <- "Standard"
most_sig$Model <- seq(1:nrow(tidied_res))
combined <- rbind(most_sig, tidied_res)
# not run: to compare cluster results to unadjusted results
#fixplot <- ggplot(data=combined, aes(x = Model, y=t, fill=Method))
#fixplot + coord_flip() + geom_bar(stat="identity", position=position_dodge()) +
#    scale_fill_manual(values=c("darkblue","lightblue")) +
#    ylab("|t-statistic|") + geom_hline(aes(yintercept=2.576), color="red") + theme_classic() +
#    annotate("text", x=18, y=20, label="t = 2.576", color="red") +
#    annotate("segment", x=17.9, xend=17.9, y=15.5, yend=3, color="red", arrow=arrow(length=unit(.05, "inches"))) +
#  theme(legend.position = c(0.85, 0.8), axis.text.y=element_blank())
#fixplot_out <- fixplot + coord_flip() + geom_bar(stat="identity", position=position_dodge()) +
#  scale_fill_manual(values=c("darkblue","lightblue")) +
#  ylab("|t-statistic|") + geom_hline(aes(yintercept=2.576), color="red") + theme_classic() +
#  annotate("text", x=18, y=20, label="t = 2.576", color="red") +
#  annotate("segment", x=17.9, xend=17.9, y=15.5, yend=3, color="red", arrow=arrow(length=unit(.05, "inches"))) +
#  theme(legend.position = c(0.85, 0.8), axis.text.y=element_blank())
#ggsave("cluster_results.pdf", plot=fixplot_out, height=4, width=4, units="in")
# multilevel modeling
models_fixed_multi <- to_fix %>%
# Build a formula for each combination
mutate(formula = map2(x, y, ~ as.formula(paste(.y, "~ as.factor(", .x,") + factor(Employment.status) +
Sex + as.numeric(Age) + I(as.numeric(Age)^2) + factor(Marital.status) + factor(Scale.of.incomes) +
factor(Highest.educational.level.attained) + factor(Social.class..subjective.) +
factor(Country.region) + factor(Year.survey) +
(1 | Language.at.home)")))) %>%
# Run a model for each formula
mutate(model = map(formula, ~ lmer(.x, data = matched_wals_wvs_home))) %>%
# Extract model elements
mutate(model_tidy = map(model, tidy))
multi_res <- lapply(seq(nrow(most_sig)), function(x) c(models_fixed_multi$model_tidy[[x]]$estimate[2],
models_fixed_multi$model_tidy[[x]]$statistic[2]))
multi_res <- data.frame(t(matrix(unlist(multi_res), ncol = nrow(to_fix), byrow = FALSE)))
multi_res <- cbind(to_fix, multi_res)
names(multi_res) <- c("y","x","b","t")
multi_res$t <- abs(multi_res$t)
multi_res$Method <- "Multilevel"
multi_res$Model <- seq(1:nrow(multi_res))
most_sig$Method <- "Standard"
most_sig$Model <- seq(1:nrow(multi_res))
combined_multi <- rbind(most_sig, multi_res)
# not run: to compare MLM results to unadjusted results
#fixplot_multi <- ggplot(data=combined_multi, aes(x = Model, y=t, fill=Method))
#fixplot_multi + coord_flip() + geom_bar(stat="identity", position=position_dodge()) +
#  scale_fill_manual(values=c("darkblue","lightblue")) +
#  ylab("|t-statistic|") + geom_hline(aes(yintercept=2.576), color="red") + theme_classic() +
#  annotate("text", x=18, y=20, label="t = 2.576", color="red") +
#  annotate("segment", x=17.9, xend=17.9, y=15.5, yend=3, color="red", arrow=arrow(length=unit(.05, "inches"))) +
#  theme(legend.position = c(0.85, 0.8), axis.text.y=element_blank())
#fixplot_multi_out <- fixplot_multi + coord_flip() + geom_bar(stat="identity", position=position_dodge()) +
#  scale_fill_manual(values=c("darkblue","lightblue")) +
#  ylab("|t-statistic|") + geom_hline(aes(yintercept=2.576), color="red") + theme_classic() +
#  annotate("text", x=18, y=20, label="t = 2.576", color="red") +
#  annotate("segment", x=17.9, xend=17.9, y=15.5, yend=3, color="red", arrow=arrow(length=unit(.05, "inches"))) +
#  theme(legend.position = c(0.85, 0.8), axis.text.y=element_blank())
#ggsave("multi_results.pdf", plot=fixplot_multi_out, height=4, width=4, units="in")
# some descriptive statistics on the three sets of results
min(most_sig$t)
median(most_sig$t)
median(na.omit(tidied_res$t))
median(multi_res$t)
format(dt(min(most_sig$t),Inf))
dt(median(na.omit(tidied_res$t)),Inf)
dt(median(multi_res$t),Inf)
### CREATE FIGURE 4
fixed_to_plot <- data.frame(cbind(seq(1:nrow(most_sig)),most_sig$t,tidied_res$t,multi_res$t))
names(fixed_to_plot) <- c("Mod","Standard","Clustered","Multilevel")
fixed_to_plot <- melt(fixed_to_plot,id=c("Mod","Standard"))
names(fixed_to_plot) <- c("Mod","Standard","Method","Fixed")
fixed_plot <- ggplot(data=fixed_to_plot, aes(x = Fixed, y=Standard, color=Method))
fixed_plot + geom_point()  + theme_classic() +
geom_vline(aes(xintercept=2.576), linetype="dashed") + theme(legend.position = c(0.9, 0.7)) +
scale_color_manual(values=c("black", "gray")) +
annotate("text", x=8, y=21, label="t = 2.576") +
annotate("segment", x=6, xend=3, y=21, yend=21, arrow=arrow(length=unit(.05, "inches"))) +
ylab("|t-statistic|, Unadjusted Models") + xlab("|t-statistic|, Adjusted Models")
fixed_plot_out <- fixed_plot + geom_point()  + theme_classic() +
geom_vline(aes(xintercept=2.576), linetype="dashed") + theme(legend.position = c(0.9, 0.8)) +
scale_color_manual(values=c("black", "gray")) +
annotate("text", x=5, y=21, label="t = 2.576") +
annotate("segment", x=4, xend=2.7, y=21, yend=21, arrow=arrow(length=unit(.05, "inches"))) +
ylab("|t-statistic|, Unadjusted Models") + xlab("|t-statistic|, Adjusted Models")
ggsave("figure4.pdf", plot=fixed_plot_out, height=3, width=5, units="in")
######## APPENDIX C: SIMULATIONS, ETC. ###################
set.seed(14850)
lang.list <- names(table(matched_wals_wvs_home$Language.at.home))
to_simulate <- function() {
# assign 1/0 variable to all languages at random
random.features <- data.frame(cbind(lang.list,rbinom(length(lang.list),1,.5)))
names(random.features) <- c("Language.at.home","random.feature")
matched_wals_wvs_home_random <- merge(random.features, matched_wals_wvs_home, all.x=TRUE, by="Language.at.home")
# randomly choose a DV
random.dv <- sample(c(18:23,25:35,37:42,44:45),1)
matched_wals_wvs_home_random$to.fit <- unlist(matched_wals_wvs_home_random[random.dv])
# a function that estimates each model
toreturn <- tryCatch(
{
fit.lm <- lm(to.fit ~ random.feature + factor(Social.class..subjective.) + factor(Employment.status) +
Sex + as.numeric(Age) + I(as.numeric(Age)^2) + factor(Marital.status) + factor(Scale.of.incomes) +
factor(Highest.educational.level.attained)  +
factor(Country.region) + factor(Year.survey), data=matched_wals_wvs_home_random)
vcov_twoway <- cluster.vcov(fit.lm, cbind(matched_wals_wvs_home_random$Country.region, matched_wals_wvs_home_random$Year.survey))
cl2 <- coeftest(fit.lm, vcov_twoway)
fit.lmer <- lmer(to.fit ~ random.feature + factor(Social.class..subjective.) + factor(Employment.status) +
Sex + as.numeric(Age) + I(as.numeric(Age)^2) + factor(Marital.status) + factor(Scale.of.incomes) +
factor(Highest.educational.level.attained)  +
factor(Country.region) + factor(Year.survey) + (1 | Language.at.home), data=matched_wals_wvs_home_random)
c(summary(fit.lm)$coefficients[2,3],
cl2[2,3],
summary(fit.lmer)$coefficients[2,3])
},
error=function(cond) {
c(NA,NA,NA)
}
)
return(toreturn)
}
# run the simulation
reps <- replicate(250,to_simulate())
# organize the results and check how significant
results <- na.omit(data.frame(t(reps)))
length(results[,1][abs(results[,1])>2.576])/length(results[,1])
length(results[,2][abs(results[,2])>2.576])/length(results[,2])
length(results[,3][abs(results[,3])>2.576])/length(results[,3])
# create Figure C1
to.plot <- melt(results)
to.plot$value <- abs(to.plot$value)
p <- ggplot(to.plot,aes(x=value, fill=variable))
p + geom_density(alpha=0.75, color=NA) +
geom_vline(aes(xintercept=2.576), linetype="dashed") + theme_classic() +
xlab("|t-statistic| of randomly generated linguistic feature") + ylab("Density") +
annotate("text", x=c(6.2), y=c(.4), label=c("t = 2.576")) +
scale_fill_manual(values = c("black", "gray50", "gray80"),
name = "Method", labels = c("OLS with Fixed Effects", "Twoway Clustered SEs", "Multilevel Model")) +
theme(legend.position = c(0.8, 0.5)) +
annotate("segment", x=5, xend=2.8, y=.4, yend=.4, arrow=arrow(length=unit(.05, "inches")))
p_out <- p + geom_density(alpha=0.75, color=NA) +
geom_vline(aes(xintercept=2.576), linetype="dashed") + theme_classic() +
xlab("|t-statistic| of randomly generated linguistic feature") + ylab("Density") +
annotate("text", x=c(6.2), y=c(.4), label=c("t = 2.576")) +
scale_fill_manual(values = c("black", "gray50", "gray80"),
name = "Method", labels = c("OLS with Fixed Effects", "Twoway Clustered SEs", "Multilevel Model")) +
theme(legend.position = c(0.8, 0.5)) +
annotate("segment", x=5, xend=2.8, y=.4, yend=.4, arrow=arrow(length=unit(.05, "inches")))
ggsave("figureC1.pdf", plot=p_out, height=3, width=6, units="in")
##### THESE TABLES CHECK TO IF ESTIMATES THAT WE THINK SHOULD BE SIGNIFICANT ACTUALLY ARE
# TABLE C1: saving
tidyfit <- tidy(lm(Family.savings.during.past.year ~ factor(Employment.status) + factor(Scale.of.incomes)  +
factor(Social.class..subjective.) + Politeness.Distinctions.in.Pronouns +
Sex + as.numeric(Age) + I(as.numeric(Age)^2) + factor(Marital.status) + factor(Highest.educational.level.attained) +
factor(Country.region) + factor(Year.survey), data=matched_wals_wvs_home))
fit <- lm(Family.savings.during.past.year ~ factor(Employment.status) + factor(Scale.of.incomes)  +
factor(Social.class..subjective.) + Politeness.Distinctions.in.Pronouns +
Sex + as.numeric(Age) + I(as.numeric(Age)^2) + factor(Marital.status) + factor(Highest.educational.level.attained) +
factor(Country.region) + factor(Year.survey), data=matched_wals_wvs_home)
fit.lmer <- lmer(Family.savings.during.past.year ~ factor(Employment.status) + factor(Scale.of.incomes)  +
factor(Social.class..subjective.) + Politeness.Distinctions.in.Pronouns +
Sex + as.numeric(Age) + I(as.numeric(Age)^2) + factor(Marital.status) + factor(Highest.educational.level.attained) +
factor(Country.region) + factor(Year.survey) + (1 | Language.at.home), data=matched_wals_wvs_home)
vcov_oneway <- cluster.vcov(fit, matched_wals_wvs_home$Country.region)
vcov_twoway <- cluster.vcov(fit, cbind(matched_wals_wvs_home$Country.region, matched_wals_wvs_home$Year.survey))
stargazer.labels <- c("Housewife","Other","Part-Time","Retired","Self-Employed","Student","Unemployed")
stargazer.columns <- c("OLS", "Twoway Clustered SEs", "Multilevel Model")
stargazer(fit, coeftest(fit, vcov_twoway), fit.lmer,
keep=c("Employment.status"), covariate.labels = stargazer.labels, model.names=FALSE,
dep.var.labels.include=FALSE, dep.var.caption="",
no.space=TRUE, star.cutoffs = c(0.05, 0.01, 0.001),
column.labels = stargazer.columns, keep.stat = c("n"), label="table:c_t1",
out="tableC1.tex")
stargazer(fit, coeftest(fit, vcov_twoway), fit.lmer,
keep=c("Employment.status"), covariate.labels = stargazer.labels, model.names=FALSE,
dep.var.labels.include=FALSE, dep.var.caption="",
no.space=TRUE, star.cutoffs = c(0.05, 0.01, 0.001),
column.labels = stargazer.columns, keep.stat = c("n"), label="table:c_t1",
out="tableC1.html")
# TABLE C2: social class and left-right positioning
tidyfit <- tidy(lm(Self.positioning.in.political.scale ~ factor(Social.class..subjective.) + Politeness.Distinctions.in.Pronouns + factor(Employment.status) +
Sex + as.numeric(Age) + I(as.numeric(Age)^2) + factor(Marital.status) + factor(Scale.of.incomes) +
factor(Highest.educational.level.attained)  +
factor(Country.region) + factor(Year.survey), data=matched_wals_wvs_home))
fit <- lm(Self.positioning.in.political.scale ~ factor(Social.class..subjective.) + Politeness.Distinctions.in.Pronouns + factor(Employment.status) +
Sex + as.numeric(Age) + I(as.numeric(Age)^2) + factor(Marital.status) + factor(Scale.of.incomes) +
factor(Highest.educational.level.attained)  +
factor(Country.region) + factor(Year.survey), data=matched_wals_wvs_home)
fit.lmer <- lmer(Self.positioning.in.political.scale ~ factor(Social.class..subjective.) + Politeness.Distinctions.in.Pronouns + factor(Employment.status) +
Sex + as.numeric(Age) + I(as.numeric(Age)^2) + factor(Marital.status) + factor(Scale.of.incomes) +
factor(Highest.educational.level.attained)  +
factor(Country.region) + factor(Year.survey) + (1 | Language.at.home), data=matched_wals_wvs_home)
vcov_oneway <- cluster.vcov(fit, matched_wals_wvs_home$Country.region)
vcov_twoway <- cluster.vcov(fit, cbind(matched_wals_wvs_home$Country.region, matched_wals_wvs_home$Year.survey))
stargazer.labels <- c("Lower middle class","Upper class","Upper  middle class","Working class")
stargazer.columns <- c("OLS", "Twoway Clustered SEs", "Multilevel Model")
stargazer(fit, coeftest(fit, vcov_twoway), fit.lmer,
keep=c("Social.class..subjective"), covariate.labels = stargazer.labels, model.names=FALSE,
dep.var.labels.include=FALSE, dep.var.caption="",
no.space=TRUE, star.cutoffs = c(0.05, 0.01, 0.001),
column.labels = stargazer.columns, keep.stat = c("n"), label="table:c_t2",
out="tableC2.tex")
stargazer(fit, coeftest(fit, vcov_twoway), fit.lmer,
keep=c("Social.class..subjective"), covariate.labels = stargazer.labels, model.names=FALSE,
dep.var.labels.include=FALSE, dep.var.caption="",
no.space=TRUE, star.cutoffs = c(0.05, 0.01, 0.001),
column.labels = stargazer.columns, keep.stat = c("n"), label="table:c_t2",
out="tableC2.html")
# TABLE C3: incomes and preferences for inequality
matched_wals_wvs_home <- within(matched_wals_wvs_home, Scale.of.incomes <- factor(Scale.of.incomes))
matched_wals_wvs_home <- within(matched_wals_wvs_home, Scale.of.incomes <- relevel(Scale.of.incomes, ref = "Lower step"))
tidyfit <- tidy(lm(Income.equality ~ factor(Scale.of.incomes)  +
factor(Social.class..subjective.) + Politeness.Distinctions.in.Pronouns + factor(Employment.status) +
Sex + as.numeric(Age) + I(as.numeric(Age)^2) + factor(Marital.status) + factor(Highest.educational.level.attained) +
factor(Country.region) + factor(Year.survey), data=matched_wals_wvs_home))
fit <- lm(Income.equality ~ factor(Scale.of.incomes)  +
factor(Social.class..subjective.) + Politeness.Distinctions.in.Pronouns + factor(Employment.status) +
Sex + as.numeric(Age) + I(as.numeric(Age)^2) + factor(Marital.status) + factor(Highest.educational.level.attained) +
factor(Country.region) + factor(Year.survey), data=matched_wals_wvs_home)
fit.lmer <- lmer(Income.equality ~ factor(Scale.of.incomes)  +
factor(Social.class..subjective.) + Politeness.Distinctions.in.Pronouns + factor(Employment.status) +
Sex + as.numeric(Age) + I(as.numeric(Age)^2) + factor(Marital.status) + factor(Highest.educational.level.attained) +
factor(Country.region) + factor(Year.survey) + (1 | Language.at.home), data=matched_wals_wvs_home)
vcov_oneway <- cluster.vcov(fit, matched_wals_wvs_home$Country.region)
vcov_twoway <- cluster.vcov(fit, cbind(matched_wals_wvs_home$Country.region, matched_wals_wvs_home$Year.survey))
stargazer.labels <- c("Level 2","Level 3","Level 4","Level 5","Level 6","Level 7","Level 8","Level 9","Level 10")
stargazer.columns <- c("OLS", "Twoway Clustered SEs", "Multilevel Model")
stargazer(fit, coeftest(fit, vcov_twoway), fit.lmer,
keep=c("Scale.of.incomes"), model.names=FALSE,
dep.var.labels.include=FALSE, dep.var.caption="",
column.labels = stargazer.columns, keep.stat = c("n"),
order=c(5,9,3,2,7,6,1,4,8), covariate.labels = stargazer.labels,
no.space=TRUE, star.cutoffs = c(0.05, 0.01, 0.001),
label="table:c_t3",
out="tableC3.tex")
stargazer(fit, coeftest(fit, vcov_twoway), fit.lmer,
keep=c("Scale.of.incomes"), model.names=FALSE,
dep.var.labels.include=FALSE, dep.var.caption="",
column.labels = stargazer.columns, keep.stat = c("n"),
order=c(5,9,3,2,7,6,1,4,8), covariate.labels = stargazer.labels,
no.space=TRUE, star.cutoffs = c(0.05, 0.01, 0.001),
label="table:c_t3",
out="tableC3.html")
getwd()
results_home <- read.csv(file=paste0(path.name, "/output/results_home.csv"), sep=",")
# relabel results
results_home$y <- recode(results_home$y, `Important.in.life..Family` = "Imp.: Family",
`Important.in.life..Friends` = "Imp: Friends",
`Important.in.life..Leisure.time` = "Imp: Leisure",
`Important.in.life..Politics` = "Imp: Politics",
`Important.in.life..Work` = "Imp: Work",
`Important.in.life..Religion` = "Imp: Religion",
`Interest.in.politics` = "Political Interest",
`Most.people.can.be.trusted` = "Trust People",
`How.much.freedom.of.choice.and.control` = "Free. v. Control",
`Jobs.scarce..Employers.should.give.priority.to..nation..people.than.immigrants` = "Nation First",
`Jobs.scarce..Men.should.have.more.right.to.a.job.than.women` = "Men First",
`Woman.as.a.single.parent` = "Single Parent",
`Men.make.better.political.leaders.than.women.do` = "Male Leaders",
`Aims.of.country..first.choice` = "Strong Economy",
`Most.important..first.choice` = "Stable Economy",
`Willingness.to.fight.for.country` = "Willing to Fight",
`Self.positioning.in.political.scale` = "Left-Right",
`Income.equality` = "Inc. Inequality",
`Private.vs.state.ownership.of.business` = "Bus. v. State",
`Government.responsibility` = "Govt. Resp.",
`Political.system..Having.a.strong.leader` = "Strong Leader",
`Political.system..Having.a.democratic.political.system` = "Democracy",
`How.proud.of.nationality` = "National Pride",
`Religious.person` = "Religiosity",
`Feeling.of.happiness` = "Happiness")
# calculate the median t-stat per dv
results_home <- results_home %>%
group_by(y) %>%
mutate(med_t = median(t))
# calculate the proportion of results that are significant at p < .01, and order
# dvs by that proportion
results_home <- results_home %>%
group_by(y) %>%
mutate(pct01 = length(t[t>2.576])/length(t))
results_home <- data.frame(results_home)
to.order <- aggregate(results_home, by=list(results_home$y),
FUN=mean, na.rm=TRUE)
to.order$Group.1 <- reorder(to.order$Group.1, -to.order$pct01)
results_home$orderedys <- results_home$y
results_home$orderedys <- factor(results_home$orderedys,
levels = names(table(to.order$Group.1)))  # Reordering group factor levels
# get effect sizes relative to SD of each DV
for.sd <- cbind(matched_wals_wvs_home[17:22],matched_wals_wvs_home[24:34],
matched_wals_wvs_home[36:41],matched_wals_wvs_home[43:44])
melt.for.sd <- na.omit(melt(for.sd))
melt.for.sd$value <- as.numeric(melt.for.sd$value)
sds <- data.frame(
melt.for.sd %>%
dplyr::group_by(variable) %>%
summarize(sd(value))
)
names(sds) <- c("y","sd")
sds$y <- recode(sds$y, `Important.in.life..Family` = "Imp.: Family",
`Important.in.life..Friends` = "Imp: Friends",
`Important.in.life..Leisure.time` = "Imp: Leisure",
`Important.in.life..Politics` = "Imp: Politics",
`Important.in.life..Work` = "Imp: Work",
`Important.in.life..Religion` = "Imp: Religion",
`Interest.in.politics` = "Political Interest",
`Most.people.can.be.trusted` = "Trust People",
`How.much.freedom.of.choice.and.control` = "Free. v. Control",
`Jobs.scarce..Employers.should.give.priority.to..nation..people.than.immigrants` = "Nation First",
`Jobs.scarce..Men.should.have.more.right.to.a.job.than.women` = "Men First",
`Woman.as.a.single.parent` = "Single Parent",
`Men.make.better.political.leaders.than.women.do` = "Male Leaders",
`Aims.of.country..first.choice` = "Strong Economy",
`Most.important..first.choice` = "Stable Economy",
`Willingness.to.fight.for.country` = "Willing to Fight",
`Self.positioning.in.political.scale` = "Left-Right",
`Income.equality` = "Inc. Inequality",
`Private.vs.state.ownership.of.business` = "Bus. v. State",
`Government.responsibility` = "Govt. Resp.",
`Political.system..Having.a.strong.leader` = "Strong Leader",
`Political.system..Having.a.democratic.political.system` = "Democracy",
`How.proud.of.nationality` = "National Pride",
`Religious.person` = "Religiosity",
`Feeling.of.happiness` = "Happiness")
results_home <- merge(results_home, sds, by="y")
results_home$size <- abs(results_home$b / results_home$sd)
# effect sizes
summary(results_home$size[results_home$t>2.576])
nrow(results_home[results_home$size < .2 & results_home$t>2.576,]) /
nrow(results_home[results_home$t>2.576,])
nrow(results_home)
# compare
home <- read.csv(file=paste0(path.name, "/output/results_home.csv"))
interview <- read.csv(file=paste0(path.name, "/output/results_interview.csv"))
compare <- merge(home, interview, by=c("y","x"), suffixes = c(".home",".interview"))
mean(compare$t.interview)
mean(compare$t.home)
write.table(results_interview, file=paste0(path.name, "/output/results_interview.csv"), sep = ",", row.names=FALSE)
# save results
write.table(results_behave, file=paste0(path.name, "/output/results_behave.csv"), sep = ",", row.names=FALSE)
results_behave <- read.csv(file=paste0(path.name, "/output/results_behave.csv"), sep=",")
# relabel results
results_behave$y <- recode(results_behave$y, `How.often.do.you.attend.religious.services` = "Religious Attendance",
`Family.savings.during.past.year` = "Family Savings",
`How.many.children.do.you.have` = "Number of Children",
`Political.action..signing.a.petition` = "Ever Petition")
# calculate the median t-stat per dv
results_behave <- results_behave %>%
group_by(y) %>%
mutate(med_t = median(t))
# calculate the proportion of results that are significant at p < .01, and order
# dvs by that proportion
results_behave <- results_behave %>%
group_by(y) %>%
mutate(pct01 = length(t[t>2.576])/length(t))
results_behave <- data.frame(results_behave)
to.order <- aggregate(results_behave, by=list(results_behave$y),
FUN=mean, na.rm=TRUE)
to.order$Group.1 <- reorder(to.order$Group.1, -to.order$pct01)
results_behave$orderedys <- results_behave$y
results_behave$orderedys <- factor(results_behave$orderedys,
levels = names(table(to.order$Group.1)))  # Reordering group factor levels
################ NOW CREATE PLOTS ################
setwd(paste0(path.name, "/output"))
# all values results
p <- ggplot(data = results_home, mapping = aes(x=t))
p + geom_density(color="gray", fill="gray") +
geom_vline(aes(xintercept=median(t))) + geom_vline(aes(xintercept=2.576), color="black", linetype="dashed") +
theme_classic() +   xlab("|t-statistic| of linguistic feature variable") + ylab("Density") +
annotate("text", x=c(10,10), y=c(.13,.15), label=c("median |t-statistic|", "t = 2.576"), color=c("black","black")) +
annotate("segment", x=5, xend=3.5, y=.13, yend=.13, arrow=arrow(length=unit(.05, "inches"))) +
annotate("segment", x=5, xend=2.8, y=.15, yend=.15, color="black", arrow=arrow(length=unit(.05, "inches")))
p_out <- p + geom_density(color="gray", fill="gray") +
geom_vline(aes(xintercept=median(t))) + geom_vline(aes(xintercept=2.576), color="black", linetype="dashed") +
theme_classic() +   xlab("|t-statistic| of linguistic feature variable") + ylab("Density") +
annotate("text", x=c(10,10), y=c(.13,.15), label=c("median |t-statistic|", "t = 2.576"), color=c("black","black")) +
annotate("segment", x=6, xend=3.5, y=.13, yend=.13, arrow=arrow(length=unit(.05, "inches"))) +
annotate("segment", x=6, xend=2.8, y=.15, yend=.15, color="black", arrow=arrow(length=unit(.05, "inches")))
ggsave("figure1.pdf", plot=p_out, height=3, width=6, units="in")
# values results by DVs
p + geom_density(color="gray", fill="gray") + ylab("Density") +
theme_classic() + facet_wrap(~orderedys, ncol = 5) +
geom_vline(aes(xintercept=med_t)) + geom_vline(aes(xintercept=2.576), color="black", linetype="dashed")  +
xlab("|t-statistic| of linguistic feature variable") +
theme(strip.background = element_rect(colour="white", fill="white"))
p_out <- p + geom_density(color="gray", fill="gray") + ylab("Density") +
theme_classic() + facet_wrap(~orderedys, ncol = 5) +
geom_vline(aes(xintercept=med_t)) + geom_vline(aes(xintercept=2.576), color="black", linetype="dashed")  +
xlab("|t-statistic| of linguistic feature variable") +
theme(strip.background = element_rect(colour="white", fill="white"))
ggsave("figure2.pdf", plot=p_out, height=4, width=6, units="in")
# behavioral results by DVs
p <- ggplot(data = results_behave, mapping = aes(x=t))
p + geom_density(color="gray", fill="gray") + ylab("Density") +
theme_classic() + facet_wrap(~orderedys, ncol = 2) +
geom_vline(aes(xintercept=med_t)) + geom_vline(aes(xintercept=2.576), color="black", linetype="dashed")  +
xlab("|t-statistic| of linguistic feature variable") +
theme(strip.background = element_rect(colour="white", fill="white"))
p_out <- p + geom_density(color="gray", fill="gray") + ylab("Density") +
theme_classic() + facet_wrap(~orderedys, ncol = 2) +
geom_vline(aes(xintercept=med_t)) + geom_vline(aes(xintercept=2.576), color="black", linetype="dashed")  +
xlab("|t-statistic| of linguistic feature variable") +
theme(strip.background = element_rect(colour="white", fill="white"))
ggsave("figure3.pdf", plot=p_out, height=2.5, width=6, units="in")
# APPENDIX FIGURE A1
# comparison of language at home and language of interview
p <- ggplot(data = compare, mapping = aes(x=t.home,y=t.interview))
p + geom_point(shape=1, color="black", fill="white") + theme_classic() +
geom_hline(aes(yintercept=2.576), color="black", linetype="dashed") + geom_vline(aes(xintercept=2.576), color="black", linetype="dashed") +
ylab("|t-statistic| using Language of Interview") + xlab("|t-statistic| using Language at Home") +
scale_x_log10(breaks = scales::trans_breaks("log10", function(x) 10^x),
labels = scales::trans_format("log10", scales::math_format(10^.x))) +
scale_y_log10(breaks = scales::trans_breaks("log10", function(x) 10^x),
labels = scales::trans_format("log10", scales::math_format(10^.x)))
p_out <- p + geom_point(shape=1, color="black", fill="white") + theme_classic() +
geom_hline(aes(yintercept=2.576), color="black", linetype="dashed") + geom_vline(aes(xintercept=2.576), color="black", linetype="dashed") +
ylab("|t-statistic| using Language of Interview (Log scale)") + xlab("|t-statistic| using Language at Home (Log scale)") +
scale_x_log10(breaks = scales::trans_breaks("log10", function(x) 10^x),
labels = scales::trans_format("log10", scales::math_format(10^.x))) +
scale_y_log10(breaks = scales::trans_breaks("log10", function(x) 10^x),
labels = scales::trans_format("log10", scales::math_format(10^.x)))
ggsave("figureA1.pdf", plot=p_out, height=4, width=6, units="in")
